{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr’\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from random import sample\n",
    "from collections import defaultdict\n",
    "from datetime import datetime\n",
    "import uuid\n",
    "\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There are some Excel spreadsheet with species labels, but the labels weren't made with the path to the image in the entry but just the location and timestamp, so too hard to recover. So here, we just try getting a list of all species, and then if the species name is in the path to an image, that image will be labeled with that species."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get available species labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/Source/temp_data/CameraTrap/engagements/BNF/20190624and0815/8714_images.json') as f:\n",
    "    image_paths_original = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_paths_original = [i for i in image_paths_original if i.lower().endswith('.jpg')]\n",
    "len(image_paths_original)\n",
    "sample(image_paths_original, 5)\n",
    "image_paths = [i.lower() for i in image_paths_original if i.lower().endswith('.jpg')]\n",
    "len(image_paths)\n",
    "sample(image_paths, 5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Getting a list of species\n",
    "Only ran once to grab the majority of species."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "folder_images = []\n",
    "for p in image_paths:\n",
    "    if p.startswith('20190624cameratraps/images/kutai 1'):\n",
    "        folder_images.append(p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "361"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(folder_images)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "species_sample = set()\n",
    "\n",
    "for i in folder_images:\n",
    "    species_sample.add(i.split('/')[-2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'argus pheasant',\n",
       " 'banded palm civet',\n",
       " 'banteng',\n",
       " 'bay cat',\n",
       " 'bearded pig',\n",
       " 'bornean red muntjac',\n",
       " 'clouded leopard',\n",
       " 'crested fireback',\n",
       " 'kutai 1',\n",
       " 'macaques',\n",
       " 'malay civet',\n",
       " 'marbled cat',\n",
       " 'mouse deer',\n",
       " 'orang-utans',\n",
       " 'pangolin',\n",
       " 'porcupine',\n",
       " 'sambar deer',\n",
       " 'squirrel',\n",
       " 'sun bear',\n",
       " 'unknown'}"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "species_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "bawan_images"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Manually compiled list of species"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "101"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this list includes all the species names and typo variants I was able to find best-effort\n",
    "# can't use rat, cat, bat - these match too many strings\n",
    "\n",
    "species = ['nothing', 'bird', 'argus', 'crested fireback', 'grey headed fish eagle', \n",
    "         'bornean ground cuckoo', 'white headed fish eagle', 'babbler brown bird',\n",
    "           'camera set-up', 'humans', 'human', 'camera placing shots',\n",
    "           'civet', 'common palm civet', 'clouded leopard', 'deer', 'mongooe', 'pangolin', 'pig', 'porcupine', 'primate', 'sun bear', \n",
    "           'unclear', 'unknown',\n",
    "          'banded palm civet', 'bay cat', 'bearded pig', 'binturong', 'gibbon', 'great argus', 'leopard cat', 'long-tailed macaque',\n",
    "          'malay civet', 'marbled cat', 'moon rat', 'mouse deer', 'muntjac', 'orangutan', 'orang-utans', 'pig-tailed macaque', 'red langur', 'sambar deer',\n",
    "          'banded palm civet', 'banteng', 'bornean red muntja', 'macaque', 'squirrel',\n",
    "          'turtle', 'banded linsang', 'monitor lizard', 'flat-headed-cat', 'lizard', 'yellow muntjac',\n",
    "          'small toothed palm civet', 'st palm civet', 'small thoodhed palm civet', 'small thoothed palm civet', 'small thoodheed palm civet', \n",
    "           'small-thoodhed palm civet', 'small-toodhed palm civet', 'small-toothed palm civet',\n",
    "           \"storm's stork\", \"strom's strok\", 'storms stork', 'storm stork', 'storms st', 'raptor',\n",
    "          'rodent', 'reptile', 'orang utan', 'yellow throated marten', 'bats', \n",
    "           'collard mongoose', 'collared mongoose',\n",
    "          'brown wood owl', 'fairy pitta', 'pig-t-macaque',\n",
    "          'sunbear', 'eagle', \n",
    "           'short tailed mongoose', 'short-t-mongoose', 'short-t mongoose', 'short tailed-mongoose', 'short taled-mongoose', 's-t mongoose', 'short-tailed-mongoose', 'short - tailed mongoose', 'short tailet mongoose', 'short-tailed mongoose', \n",
    "           'mongoose', \n",
    "           'treeshrew', 'marble cat', 'flat headed cat',\n",
    "          'hunter dog', 'dog hunter', 'short-t-monggoose',\n",
    "          'reed-leaf monkey', 'red leaf monkey', 'monkey', 'otters civet', 'otter civet', 'butterfly'] # there are no \"empty\" in the file names anywhere, and there are very few \"nothing\"\n",
    "species = set(species)\n",
    "len(species)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 279638/279638 [00:09<00:00, 28101.39it/s]\n"
     ]
    }
   ],
   "source": [
    "# the dict in this direction is for verification only \n",
    "\n",
    "species_to_image_paths = defaultdict(list)\n",
    "\n",
    "for p in tqdm(image_paths_original):\n",
    "    candidate_species = []\n",
    "    for s in species:\n",
    "        if s in p.lower():\n",
    "            candidate_species.append(s)\n",
    "            \n",
    "    if len(candidate_species) == 0:\n",
    "        continue\n",
    "    \n",
    "    if 'nothing' in candidate_species:\n",
    "        species_to_image_paths['nothing'].append(p)\n",
    "    \n",
    "    most_specific_species = None\n",
    "    max_len = 0\n",
    "    for c in candidate_species:\n",
    "        if len(c) > max_len:\n",
    "            max_len = len(c)\n",
    "            most_specific_species = c\n",
    "            \n",
    "    species_to_image_paths[most_specific_species].append(p)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "99"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(species_to_image_paths.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3164"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(species_to_image_paths['nothing']) # too many wrongly spelt short-tailed mongoose... They'll be in \"mongoose\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'20190624cameratraps/images/OUTROP CAMERA TRAPS ALL/CAMERA TRAPS SABANGAU/CAMERA TRAPS 2009/other stuff/CAMERA TRAPS 2009 up to date/KM 2 x Railway/Km2 x Railway Download 23-05-09 NOTHING macaque only/CDY_0031.JPG' in species_to_image_paths['nothing']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_path_to_species = {}\n",
    "\n",
    "for s, list_paths in species_to_image_paths.items():\n",
    "    for p in list_paths:\n",
    "        image_path_to_species[p] = s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17279"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(image_path_to_species)  # about 1k are \"nothing\" labeled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sample(image_path_to_species.items(), 5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Make CCT database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "info = {\n",
    "  # Required\n",
    "  \"version\" : '20190825',\n",
    "  \"description\" : 'Images sent by Susan Cheyne from Borneo Nature Foundation',\n",
    "  \n",
    "  # Optional\n",
    "  \"year\" : 2019,\n",
    "  \"contributor\" : 'Images sent by Susan Cheyne from Borneo Nature Foundation. DB created by Siyu Yang',\n",
    "  \"date_created\" : str(datetime.today().date())\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'contributor': 'Images sent by Susan Cheyne from Borneo Nature Foundation. DB created by Siyu Yang',\n",
       " 'date_created': '2019-08-25',\n",
       " 'description': 'Images sent by Susan Cheyne from Borneo Nature Foundation',\n",
       " 'version': '20190825',\n",
       " 'year': 2019}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "['argus',\n",
       " 'babbler brown bird',\n",
       " 'banded linsang',\n",
       " 'banded palm civet',\n",
       " 'banteng',\n",
       " 'bats',\n",
       " 'bay cat',\n",
       " 'bearded pig',\n",
       " 'binturong',\n",
       " 'bird',\n",
       " 'bornean ground cuckoo',\n",
       " 'bornean red muntja',\n",
       " 'brown wood owl',\n",
       " 'butterfly',\n",
       " 'camera placing shots',\n",
       " 'camera set-up',\n",
       " 'civet',\n",
       " 'clouded leopard',\n",
       " 'collard mongoose',\n",
       " 'collared mongoose',\n",
       " 'common palm civet',\n",
       " 'crested fireback',\n",
       " 'deer',\n",
       " 'dog hunter',\n",
       " 'eagle',\n",
       " 'fairy pitta',\n",
       " 'flat headed cat',\n",
       " 'flat-headed-cat',\n",
       " 'gibbon',\n",
       " 'great argus',\n",
       " 'grey headed fish eagle',\n",
       " 'humans',\n",
       " 'hunter dog',\n",
       " 'leopard cat',\n",
       " 'long-tailed macaque',\n",
       " 'macaque',\n",
       " 'malay civet',\n",
       " 'marble cat',\n",
       " 'marbled cat',\n",
       " 'mongooe',\n",
       " 'mongooes',\n",
       " 'mongoose',\n",
       " 'monitor lizard',\n",
       " 'monkey',\n",
       " 'moon rat',\n",
       " 'mouse deer',\n",
       " 'muntjac',\n",
       " 'nothing',\n",
       " 'orang utan',\n",
       " 'orang-utans',\n",
       " 'orangutan',\n",
       " 'otter civet',\n",
       " 'otters civet',\n",
       " 'pangolin',\n",
       " 'pig',\n",
       " 'pig-t-macaque',\n",
       " 'pig-tailed macaque',\n",
       " 'porcupine',\n",
       " 'primate',\n",
       " 'raptor',\n",
       " 'red langur',\n",
       " 'red leaf monkey',\n",
       " 'reed-leaf monkey',\n",
       " 'reptile',\n",
       " 'rodent',\n",
       " 's-t mongoose',\n",
       " 'sambar deer',\n",
       " 'short - tailed mongoose',\n",
       " 'short tailed mongoose',\n",
       " 'short tailed-mongoose',\n",
       " 'short tailet mongoose',\n",
       " 'short taled-mongoose',\n",
       " 'short-t mongoose',\n",
       " 'short-t-monggoose',\n",
       " 'short-t-mongoose',\n",
       " 'short-tailed mongoose',\n",
       " 'short-tailed-mongoose',\n",
       " 'small thoodhed palm civet',\n",
       " 'small thoodheed palm civet',\n",
       " 'small thoothed palm civet',\n",
       " 'small toothed palm civet',\n",
       " 'small-thoodhed palm civet',\n",
       " 'small-toodhed palm civet',\n",
       " 'small-toothed palm civet',\n",
       " 'squirrel',\n",
       " 'st palm civet',\n",
       " 'storm stork',\n",
       " \"storm's stork\",\n",
       " 'storms st',\n",
       " 'storms stork',\n",
       " \"strom's strok\",\n",
       " 'sun bear',\n",
       " 'sunbear',\n",
       " 'treeshrew',\n",
       " 'turtle',\n",
       " 'unclear',\n",
       " 'unknown',\n",
       " 'white headed fish eagle',\n",
       " 'yellow muntjac',\n",
       " 'yellow throated marten']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(species_to_image_paths)\n",
    "sorted(species_to_image_paths.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_map = {\n",
    "    'nothing': 'empty',\n",
    "    \"strom's strok\": \"storm's stork\",\n",
    "    \"storms stork\": \"storm's stork\",\n",
    "    \"storms st\": \"storm's stork\",\n",
    "    \"storm stork\": \"storm's stork\", \n",
    "    'pig-t-macaque': 'pig-tailed macaque',\n",
    "    \"unclear\": \"unknown\",\n",
    "    'camera set-up': 'human',\n",
    "    'humans': 'human',\n",
    "    'orang-utans': 'orangutan',\n",
    "    'orang utan': 'orangutan',\n",
    "    'sunbear': 'sun bear',\n",
    "    'marble cat': 'marbled cat',\n",
    "    'flat-headed-cat': 'flat headed cat',\n",
    "    'dog hunter': 'hunter dog',\n",
    "    'small thoodhed palm civet': 'small toothed palm civet',\n",
    "    'short tailed-mongoose': 'short-tailed mongoose',\n",
    "    'short taled-mongoose': 'short-tailed mongoose',\n",
    "    'short-t mongoose': 'short-tailed mongoose',\n",
    "    'short-t-monggoose': 'short-tailed mongoose',\n",
    "    'short tailed mongoose': 'short-tailed mongoose',\n",
    "    'short-t-mongoose': 'short-tailed mongoose',\n",
    "    'short-tailed-mongoose': 'short-tailed mongoose',\n",
    "    's-t mongoose': 'short-tailed mongoose',\n",
    "    'short - tailed mongoose': 'short-tailed mongoose',\n",
    "    'short tailet mongoose': 'short-tailed mongoose',\n",
    "    'mongooe': 'mongoose',\n",
    "    'reed-leaf monkey': 'red leaf monkey',\n",
    "    'otters civet': 'otter civet',\n",
    "    'small toothed palm civet': 'small-toothed palm civet', \n",
    "    'st palm civet': 'small-toothed palm civet', \n",
    "    'small thoodhed palm civet': 'small-toothed palm civet', \n",
    "    'small thoothed palm civet': 'small-toothed palm civet', \n",
    "    'small thoodheed palm civet': 'small-toothed palm civet',\n",
    "    'small-thoodhed palm civet': 'small-toothed palm civet',\n",
    "    'small-toodhed palm civet': 'small-toothed palm civet',\n",
    "    'collard mongoose': 'collared mongoose'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17279"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "65"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "{'argus',\n",
       " 'babbler brown bird',\n",
       " 'banded linsang',\n",
       " 'banded palm civet',\n",
       " 'banteng',\n",
       " 'bats',\n",
       " 'bay cat',\n",
       " 'bearded pig',\n",
       " 'binturong',\n",
       " 'bird',\n",
       " 'bornean ground cuckoo',\n",
       " 'bornean red muntja',\n",
       " 'brown wood owl',\n",
       " 'butterfly',\n",
       " 'camera placing shots',\n",
       " 'civet',\n",
       " 'clouded leopard',\n",
       " 'collared mongoose',\n",
       " 'common palm civet',\n",
       " 'crested fireback',\n",
       " 'deer',\n",
       " 'eagle',\n",
       " 'empty',\n",
       " 'fairy pitta',\n",
       " 'flat headed cat',\n",
       " 'gibbon',\n",
       " 'great argus',\n",
       " 'grey headed fish eagle',\n",
       " 'human',\n",
       " 'hunter dog',\n",
       " 'leopard cat',\n",
       " 'long-tailed macaque',\n",
       " 'macaque',\n",
       " 'malay civet',\n",
       " 'marbled cat',\n",
       " 'mongoose',\n",
       " 'monitor lizard',\n",
       " 'monkey',\n",
       " 'moon rat',\n",
       " 'mouse deer',\n",
       " 'muntjac',\n",
       " 'orangutan',\n",
       " 'otter civet',\n",
       " 'pangolin',\n",
       " 'pig',\n",
       " 'pig-tailed macaque',\n",
       " 'porcupine',\n",
       " 'primate',\n",
       " 'raptor',\n",
       " 'red langur',\n",
       " 'red leaf monkey',\n",
       " 'reptile',\n",
       " 'rodent',\n",
       " 'sambar deer',\n",
       " 'short-tailed mongoose',\n",
       " 'small-toothed palm civet',\n",
       " 'squirrel',\n",
       " \"storm's stork\",\n",
       " 'sun bear',\n",
       " 'treeshrew',\n",
       " 'turtle',\n",
       " 'unknown',\n",
       " 'white headed fish eagle',\n",
       " 'yellow muntjac',\n",
       " 'yellow throated marten'}"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# redo image_path_to_species to uniformize species names\n",
    "for p, s in image_path_to_species.items():\n",
    "    if s in cat_map:\n",
    "        image_path_to_species[p] = cat_map[s]\n",
    "\n",
    "len(image_path_to_species)\n",
    "\n",
    "valid_species = set(image_path_to_species.values())\n",
    "len(valid_species)\n",
    "valid_species"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "categories = {}\n",
    "i = 1\n",
    "for s in sorted(list(valid_species)):\n",
    "    categories[s] = i\n",
    "    i += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17279"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "17279"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "images = []\n",
    "annotations = []\n",
    "\n",
    "for p, s in image_path_to_species.items():\n",
    "    if s in cat_map:\n",
    "        s = cat_map[s]\n",
    "        \n",
    "    image_id = str(uuid.uuid4())\n",
    "    anno_id = str(uuid.uuid4())\n",
    "    category_id = categories[s]\n",
    "    \n",
    "    images.append({\n",
    "        'id': image_id,\n",
    "        'file_name': p\n",
    "    })\n",
    "    annotations.append({\n",
    "        'id': anno_id,\n",
    "        'image_id': image_id,\n",
    "        'category_id': category_id\n",
    "    })\n",
    "len(images)\n",
    "len(annotations)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "images[100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'category_id': 46,\n",
       " 'id': '55f2a527-a0bd-4df1-8b93-3dc6ef37eec9',\n",
       " 'image_id': 'dab5bc96-54fa-49c1-b73a-1ca39c47eef6'}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "annotations[100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "categories_final = []\n",
    "for name, i in categories.items():\n",
    "    categories_final.append({\n",
    "        'id': i,\n",
    "        'name': name\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "cct_db = {\n",
    "    'info': info,\n",
    "    'images': images,\n",
    "    'annotations': annotations,\n",
    "    'categories': categories_final\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/Source/temp_data/CameraTrap/engagements/BNF/20190624and0815/BNF_20190624and0815_20190825a.json', 'w') as f:\n",
    "    json.dump(cct_db, f, indent=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Split detector results into labeled and unlabeled portions\n",
    "\n",
    "There was no reliable labels for \"empty\" images."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/Source/temp_data/CameraTrap/engagements/BNF/20190624and0815/8714_detections_bnf0624and0815_20190815182245_refiltered.json') as f:\n",
    "    all_res = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "278873"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(all_res['images'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17279"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "images_with_label = set(image_path_to_species.keys())\n",
    "len(images_with_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "17279"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "261594"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labeled_im_res = []\n",
    "unlabeled_im_res = []\n",
    "\n",
    "for res in all_res['images']:\n",
    "    if res['file'] in images_with_label:\n",
    "        labeled_im_res.append(res)\n",
    "    else:\n",
    "        unlabeled_im_res.append(res)\n",
    "len(labeled_im_res)\n",
    "len(unlabeled_im_res)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['info', 'detection_categories', 'images'])"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_res.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "labeled_res = {\n",
    "    'info': all_res['info'],\n",
    "    'detection_categories': all_res['detection_categories'],\n",
    "    'images': labeled_im_res\n",
    "}\n",
    "\n",
    "unlabeled_im_res = {\n",
    "    'info': all_res['info'],\n",
    "    'detection_categories': all_res['detection_categories'],\n",
    "    'images': unlabeled_im_res\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/Source/temp_data/CameraTrap/engagements/BNF/20190624and0815/8714_detections_bnf0624and0815_20190815182245_refiltered_labeled.json', 'w') as f:\n",
    "    json.dump(labeled_res, f, indent=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/Users/siyuyang/Source/temp_data/CameraTrap/engagements/BNF/20190624and0815/8714_detections_bnf0624and0815_20190815182245_refiltered_unlabeled.json', 'w') as f:\n",
    "    json.dump(unlabeled_im_res, f, indent=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:tensorflow]",
   "language": "python",
   "name": "conda-env-tensorflow-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
